set more off, perm

/* Directories */
global orig_data_thecb "E:\projects\Rank\NewFilesReleased\THECB\"
global orig_data "E:\projects\Rank\NewFilesReleased\"
global orig_data_tea "E:\projects\Rank\NewFilesReleased\TEA\"

global work_dir  "E:\Rank\workfiles\"
global ui_dir "E:\Rank\temp\"
global logfiles "E:\Rank\logfiles\"
global dofiles "E:\Rank\dofiles\"
global results "E:\Rank\results\"


**************
*8th Grade
**************

*This secion reads in Test Scores for 8th Graders
*TAAS happened from 1994-2003

foreach i in "94" "95" "96" "97" "98" "99" "00" "01" "02"{
if inlist(`i',94,95,96,97,98,99){
use district campus grade  rscode mscode rawmth ///
rawred rawwrt ID2 using /// 
${orig_data}TAASandTAKSandSTAAR\19`i'\g8_repf_au`i'.dta, clear
}

else {
use district campus grade  rscode mscode rawmth ///
rawred rawwrt ID2 using /// 
 ${orig_data}TAASandTAKSandSTAAR\20`i'\g8_repf_sp`i'.dta, clear
}

rename *, lower

keep if mscode=="S"
keep if rscode=="S"

*requires id2 to match
drop if missing(id2)

replace rawmth=. if rawmth==0
replace rawred=. if rawred==0

tostring rawmth, replace

bys id2 (rawmth): keep if _n==1

destring rawmth, replace

keep district campus grade   ///
rscode mscode rawmth rawred rawwrt id2

/*Testing Lables
Math 1
Read 2
Write 3
*/

egen morder=rank(rawmth)
gen state_perc_1=1-((morder-1)/(_N-1))

egen rorder=rank(rawred)
gen state_perc_2=1-((rorder-1)/(_N-1))

gen junk=1

egen num_class=sum(junk), by(campus)

egen morder_class=rank(rawmth), by(campus)
egen rorder_class=rank(rawred), by(campus)

gen ordinal_rank_1=1-((morder_class-1)/(num_class-1))
gen ordinal_rank_2=1-((rorder_class-1)/(num_class-1))

reshape long ordinal_rank_ state_perc_, i(id2) j(subject)

rename ordinal_rank_ ordinal_rank
rename state_perc_ state_perc

foreach x of varlist _all{
rename `x' `x'_8
}

rename id2_8 id2
rename subject_8 subject

if inlist(`i',94,95,96,97,98,99){
gen enr8_year=19`i'
save ${work_dir}grade8_testing_19`i', replace
}
else {
gen enr8_year=20`i'
save ${work_dir}grade8_testing_20`i', replace
}
}



*TAKS begins
*Nonconsistent labeling of files 
*Reads in files and creates consistent naming
use ${orig_data}TAASandTAKSandSTAAR\2003\taks8.dta, clear
rename *, lower
save ${work_dir}taks8_2003, replace

use ${orig_data}TAASandTAKSandSTAAR\2004\taks8_fy04.dta, clear
rename *, lower
save ${work_dir}taks8_2004, replace


use ${orig_data}TAASandTAKSandSTAAR\2005\taks8_fy05.dta, clear
rename *, lower
save ${work_dir}taks8_2005, replace

use ${orig_data}TAASandTAKSandSTAAR\2006\taks8_apr06.dta, clear
rename *, lower
save ${work_dir}taks8_2006, replace

use ${orig_data}TAASandTAKSandSTAAR\2007\taks8_apr07.dta, clear
rename *, lower
save ${work_dir}taks8_2007, replace


forval i=2003/2007{

use ${work_dir}taks8_`i', clear

keep if m_scode=="S"
keep if r_scode=="S"

drop if missing(id2)

replace m_raw=. if m_raw==0
replace w_raw=. if w_raw==0
tostring m_raw, replace
bys id2 (m_raw): keep if _n==1

destring m_raw, replace

keep district campus grade  ///
r_scode m_scode m_raw r_raw  id2

egen morder=rank(m_raw)
gen state_perc_1=1-((morder-1)/(_N-1))

egen rorder=rank(r_raw)
gen state_perc_2=1-((rorder-1)/(_N-1))


gen junk=1

egen num_class=sum(junk), by(campus)

egen morder_class=rank(m_raw), by(campus)
egen rorder_class=rank(r_raw), by(campus)

gen ordinal_rank_1=1-((morder_class-1)/(num_class-1))
gen ordinal_rank_2=1-((rorder_class-1)/(num_class-1))


reshape long ordinal_rank_ state_perc_, i(id2) j(subject)

rename ordinal_rank_ ordinal_rank
rename state_perc_ state_perc

foreach x of varlist _all{
rename `x' `x'_8
}

rename id2_8 id2
rename subject_8 subject
gen enr8_year=`i'
save ${work_dir}grade8_testing_`i', replace

}



*Math
use ${orig_data}TAASandTAKSandSTAAR\2008\taks8m_apr08.dta, clear
rename *, lower

drop if missing(id2)


keep if m_scode=="S"
keep district campus grade   ///
 m_scode m_raw id2


replace m_raw=. if m_raw==0 
tostring m_raw, replace
bys id2 (m_raw): keep if _n==1
destring m_raw, replace


egen morder=rank(m_raw)
gen state_perc_1=1-((morder-1)/(_N-1))
gen junk=1
egen num_class=sum(junk), by(campus)
egen morder_class=rank(m_raw), by(campus)
gen ordinal_rank_1=1-((morder_class-1)/(num_class-1))

reshape long ordinal_rank_ state_perc_, i(id2) j(subject)

rename ordinal_rank_ ordinal_rank
rename state_perc_ state_perc

foreach x of varlist _all{
rename `x' `x'_8
}

rename id2_8 id2
rename subject_8 subject
gen enr8_year=2008
save ${work_dir}grade8m_testing_2008, replace


*Reading


use ${orig_data}TAASandTAKSandSTAAR\2008\taks8r_mar08.dta, clear
rename *, lower
duplicates drop id2, force

keep if r_scode=="S"

keep district campus grade  ///
 r_scode r_raw id2
 
replace r_raw=. if r_raw==0
tostring r_raw, replace
bys id2 (r_raw): keep if _n==1
destring r_raw, replace


egen rorder=rank(r_raw)
gen state_perc_2=1-((rorder-1)/(_N-1))

gen junk=1

egen num_class=sum(junk), by(campus)

egen rorder_class=rank(r_raw), by(campus)
gen ordinal_rank_2=1-((rorder_class-1)/(num_class-1))

reshape long ordinal_rank_ state_perc_, i(id2) j(subject)

rename ordinal_rank_ ordinal_rank
rename state_perc_ state_perc

foreach x of varlist _all{
rename `x' `x'_8
}

rename id2_8 id2
rename subject_8 subject
gen enr8_year=2008
save ${work_dir}grade8r_testing_2008, replace

use ${work_dir}grade8r_testing_2008, clear
append using ${work_dir}grade8m_testing_2008

save ${work_dir}grade8_testing_2008, replace


foreach i in "09"  {

use ${orig_data}TAASandTAKSandSTAAR\20`i'\taks8m_apr`i'.dta, clear
rename *, lower
duplicates drop id2, force
keep if m_scode=="S"
save ${work_dir}test8m_apr`i', replace


keep district campus grade    ///
 m_scode m_raw id2
 
replace m_raw=. if m_raw==0
tostring m_raw, replace
bys id2 (m_raw): keep if _n==1
destring m_raw, replace

egen morder=rank(m_raw)
gen state_perc_1=1-((morder-1)/(_N-1))

gen junk=1

egen num_class=sum(junk), by(campus)

egen morder_class=rank(m_raw), by(campus)
gen ordinal_rank_1=1-((morder_class-1)/(num_class-1))

reshape long ordinal_rank_ state_perc_, i(id2) j(subject)

rename ordinal_rank_ ordinal_rank
rename state_perc_ state_perc

foreach x of varlist _all{
rename `x' `x'_8
}

rename id2_8 id2
rename subject_8 subject
gen enr8_year=20`i'
save ${work_dir}grade8m_testing_20`i', replace


*Reading
use ${orig_data}TAASandTAKSandSTAAR\20`i'\taks8r_mar`i'.dta, clear
rename *, lower
duplicates drop id2, force

keep if r_scode=="S"



keep district campus grade ///
 r_scode r_raw id2

replace r_raw=. if r_raw==0
tostring r_raw, replace
bys id2 (r_raw): keep if _n==1
destring r_raw, replace


egen rorder=rank(r_raw)
gen state_perc_2=1-((rorder-1)/(_N-1))

gen junk=1

egen num_class=sum(junk), by(campus)

egen rorder_class=rank(r_raw), by(campus)
gen ordinal_rank_2=(1-rorder_class/num_class)

reshape long ordinal_rank_ state_perc_, i(id2) j(subject)

rename ordinal_rank_ ordinal_rank
rename state_perc_ state_perc

foreach x of varlist _all{
rename `x' `x'_8
}

rename id2_8 id2
rename subject_8 subject
gen enr8_year=20`i'
save ${work_dir}grade8r_testing_20`i', replace

use ${work_dir}grade8r_testing_20`i', clear
append using ${work_dir}grade8m_testing_20`i'
save ${work_dir}grade8_testing_20`i', replace
}

*2010-2011


foreach i in "10" "11"  {

use ${orig_data}TAASandTAKSandSTAAR\20`i'\taks8rm_apr`i'.dta, clear
rename *, lower
duplicates drop id2, force
cap rename ethnicity ethnic

keep if m_scode=="S"
keep if r_scode=="S"


keep district campus grade ///
 m_scode m_raw  r_scode r_raw  id2


replace m_raw=. if m_raw==0
tostring m_raw, replace
bys id2 (m_raw): keep if _n==1
destring m_raw, replace

egen morder=rank(m_raw)
gen state_perc_1=1-((morder-1)/(_N-1))


egen rorder=rank(r_raw)
gen state_perc_2=1-((rorder-1)/(_N-1))

gen junk=1

egen num_class=sum(junk), by(campus)

egen morder_class=rank(m_raw), by(campus)
gen ordinal_rank_1=1-((morder_class-1)/(num_class-1))

egen rorder_class=rank(r_raw), by(campus)
gen ordinal_rank_2=1-((rorder_class-1)/(num_class-1))

reshape long ordinal_rank_ state_perc_, i(id2) j(subject)

rename ordinal_rank_ ordinal_rank
rename state_perc_ state_perc

foreach x of varlist _all{
rename `x' `x'_8
}

rename id2_8 id2
rename subject_8 subject
gen enr8_year=20`i'
save ${work_dir}grade8_testing_20`i', replace

}


*********************************************
*3rd Grade
*********************************************

forval i=94/99{
use ${orig_data}TAASandTAKSandSTAAR\19`i'\g3_repf_au`i'.dta, clear


rename *, lower
drop if missing(id2)

replace rawmth=. if rawmth==0
replace rawred=. if rawred==0

tostring rawmth, replace

bys id2 (rawmth): keep if _n==1

destring rawmth, replace

keep if mscode=="S"
keep if rscode=="S"


keep district campus grade year month sex ethnic disadv leprof ///
rscode wscode mscode rawmth rawred rawwrt id2

/*Testing Lables
Math 1
Read 2
*/

egen morder=rank(rawmth)
gen state_perc_1=1-((morder-1)/(_N-1))

egen rorder=rank(rawred)
gen state_perc_2=1-((rorder-1)/(_N-1))


gen junk=1

egen num_class=sum(junk), by(campus)

egen morder_class=rank(rawmth), by(campus)
egen rorder_class=rank(rawred), by(campus)

gen ordinal_rank_1=1-((morder_class-1)/(num_class-1))
gen ordinal_rank_2=1-((rorder_class-1)/(num_class-1))

reshape long ordinal_rank_ state_perc_, i(id2) j(subject)

gen stu_year=19`i'

rename ordinal_rank_ ordinal_rank
rename state_perc_ state_perc

foreach x of varlist _all{
rename `x' `x'_3
}


rename id2_3 id2
rename subject_3 subject

gen school_year=`i'

save ${work_dir}grade3_testing_19`i', replace


}

foreach i in "00" "01" "02"{
use ${orig_data}TAASandTAKSandSTAAR\20`i'\g3_repf_sp`i'.dta, clear

rename *, lower

keep if mscode=="S"
keep if rscode=="S"

drop if missing(id2)

replace rawmth=. if rawmth==0
replace rawred=. if rawred==0
tostring rawmth, replace
bys id2 (rawmth): keep if _n==1
destring rawmth, replace

keep district campus grade year month sex ethnic disadv leprof ///
rscode wscode mscode rawmth rawred rawwrt id2

/*Testing Lables
Math 1
Read 2
Write 3
*/

egen morder=rank(rawmth)
gen state_perc_1=1-((morder-1)/(_N-1))

egen rorder=rank(rawred)
gen state_perc_2=1-((rorder-1)/(_N-1))

gen junk=1

egen num_class=sum(junk), by(campus)

egen morder_class=rank(rawmth), by(campus)
egen rorder_class=rank(rawred), by(campus)

gen ordinal_rank_1=1-((morder_class-1)/(num_class-1))
gen ordinal_rank_2=1-((rorder_class-1)/(num_class-1))

reshape long ordinal_rank_ state_perc_, i(id2) j(subject)

rename ordinal_rank_ ordinal_rank
rename state_perc_ state_perc

gen stu_year=20`i'
foreach x of varlist _all{
rename `x' `x'_3
}

rename id2_3 id2
rename subject_3 subject

gen school_year=`i'

save ${work_dir}grade3_testing_20`i', replace

}
*******************************************

use ${orig_data}TAASandTAKSandSTAAR\2003\taks3_march.dta, clear
rename *, lower
save ${work_dir}taks3_march_2003, replace

use ${orig_data}TAASandTAKSandSTAAR\2003\taks3_april.dta, clear
rename *, lower
save ${work_dir}taks3_april_2003, replace

foreach i in "04" "05"{
use ${orig_data}TAASandTAKSandSTAAR\20`i'\taks3_march_fy`i'.dta, clear
rename *, lower
save ${work_dir}taks3_march_20`i', replace

use ${orig_data}TAASandTAKSandSTAAR\20`i'\taks3_april_fy`i'.dta, clear
rename *, lower
save ${work_dir}taks3_april_20`i', replace
}

foreach i in "06" "07"{
use ${orig_data}TAASandTAKSandSTAAR\20`i'\taks3eng_feb`i'.dta, clear
rename *, lower
save ${work_dir}taks3_march_20`i', replace

use ${orig_data}TAASandTAKSandSTAAR\20`i'\taks3eng_apr`i'.dta, clear
rename *, lower
save ${work_dir}taks3_april_20`i', replace
}



*March has Reading Only
forval i=2003/2007{
use ${work_dir}taks3_march_`i', clear

rename *, lower
keep if r_scode=="S"

drop if missing(id2)
replace r_raw=. if r_raw==0
bys id2 (r_raw): keep if _n==1
destring r_raw, replace

keep district campus grade year month sex ethnic disadv leprof ///
r_scode r_raw id2

egen rorder=rank(r_raw)
gen state_perc_2=1-((rorder-1)/(_N-1))

gen junk=1

egen num_class=sum(junk), by(campus)

egen rorder_class=rank(r_raw), by(campus)

gen ordinal_rank_2=1-((rorder_class-1)/(num_class-1))

reshape long ordinal_rank_ state_perc_, i(id2) j(subject)

rename ordinal_rank_ ordinal_rank
rename state_perc_ state_perc

save ${work_dir}grade3_testing_r_`i', replace

*April Has Math
use ${work_dir}taks3_april_`i'.dta, clear

rename *, lower
keep if m_scode=="S"

drop if missing(id2)
replace m_raw=. if m_raw==0
bys id2 (m_raw): keep if _n==1
destring m_raw, replace


keep district campus grade year month sex ethnic disadv leprof ///
m_scode m_raw id2

egen morder=rank(m_raw)
gen state_perc_1=1-((morder-1)/(_N-1))

gen junk=1

egen num_class=sum(junk), by(campus)

egen rorder_class=rank(m_raw), by(campus)

gen ordinal_rank_1=1-((rorder_class-1)/(num_class-1))

reshape long ordinal_rank_ state_perc_, i(id2) j(subject)

rename ordinal_rank_ ordinal_rank
rename state_perc_ state_perc

save ${work_dir}grade3_testing_m_`i', replace

append using ${work_dir}grade3_testing_r_`i'
gen stu_year=`i'

foreach x of varlist _all{
rename `x' `x'_3
}
rename id2_3 id2
rename subject_3 subject
save ${work_dir}grade3_testing_`i', replace
}

**************************
*High School Graduation
**************************

foreach i in "01" "02" "03" "04" "05" "06" "07" "08" "09" "10" "11" "12" "13" "14" "15" "16"{
use ${orig_data}TEA\p_graduate`i', clear
rename  *, lower

duplicates drop id2, force

gen graduated_hs=1

*Generating Indicators for types of HS Diploma

gen distinguished_hs=0
replace distinguished_hs=1 if inlist(gradtype,"09","16","17","20","26","23")

gen minimum_hs=0
replace minimum_hs=1 if inlist(gradtype,"11","12","13","18","21","24")

gen recommended_hs=0
replace recommended_hs=1 if inlist(gradtype,"10","14","15","19","22","25")


keep id2 campus district  ///
recommended_hs minimum_hs distinguished_hs graduated_hs



*Year of HS Graduation
gen gradhs_year=20`i'



foreach v in district campus graduated_hs ///
 distinguished_hs minimum_hs recommended_hs gradhs_year{
rename `v' grad_`v'
}

save ${work_dir}p_graduate20`i', replace
}


**********************
*College Enrollment
**********************


foreach i in "01" "02" "03" "04" "05" "06" "07" "08" "09" "10" "11" "12" "13" "14" "15" "16" "17" {

 foreach j in "sprg" "fall"{
if inlist(`i',01,02,03,04,05,06,07,08){
use ID2 stufice stuyear stusem stucip stuage stutotsch using ///
 ${orig_data_thecb}u_cbm001_fy`i'_`j', clear
}
else{
use id2 stufice stuyear stusem stucip stuage stutotsch using ///
 ${orig_data_thecb}u_cbm001_fy`i'_`j', clear
}
rename *, lower 
 
destring stuage, replace
keep if stuage<22
drop stuage


destring stutotsch, replace

*This is inteded to identify the primary institution a 
*student is enrolled in
egen long maxhours=max(stutotsch), by(id2) 
egen long sumhours=sum(stutotsch), by(id2) 

keep if stutotsch==maxhours


bys id2 (stufice): keep if _n==1


foreach v in  stufice stusem stuyear stucip sumhours{
cap rename `v' `v'_4yr
}

gen enroll_yr=20`i'

drop maxhours stutotsch

gen enr4yr=1

foreach v in stufice_4yr stucip_4yr stusem_4yr ///
 stuyear_4yr enroll_yr enr4yr sumhours_4yr{
 
rename `v' `v'_`j'
}

save ${work_dir}enr_4_`j'_20`i', replace
}
}


forval y=2001/2017{
use ${work_dir}enr_4_fall_`y', clear
append using ${work_dir}enr_4_sprg_`y'



destring stufice_4yr_fall stusem_4yr_fall stuyear_4yr_fall ///
stucip_4yr_fall enroll_yr_fall enr4yr_fall stufice_4yr_sprg ///
 stusem_4yr_sprg stuyear_4yr_sprg stucip_4yr_sprg enroll_yr_sprg ///
 enr4yr_sprg sumhours_4yr_fall sumhours_4yr_sprg, replace

collapse (max) stufice_4yr_fall stusem_4yr_fall stuyear_4yr_fall ///
stucip_4yr_fall enroll_yr_fall enr4yr_fall stufice_4yr_sprg ///
 stusem_4yr_sprg stuyear_4yr_sprg stucip_4yr_sprg ///
 enroll_yr_sprg enr4yr_sprg sumhours_4yr_fall sumhours_4yr_sprg, by(id2)
 
 
gen enr4yr_all=(enr4yr_sprg==1 | enr4yr_fall==1)

save ${work_dir}enr_4_all_`y', replace

}

*2 year colleges
*See 4yr code above for documentation


foreach i in "01" "02" "03" "04" "05" "06" "07" "08" "09" "10" "11" "12" "13" "14" "15" "16" "17"{
 foreach j in "sprg" "fall"{
if inlist(`i',01,02,03,04,05,06,07,08){
use ID2 stufice stuyear stusem stucip stuage stutotsch using ///
 ${orig_data_thecb}c_cbm001_fy`i'_`j', clear
}
else{
use id2 stufice stuyear stusem stucip stuage stutotsch using ///
 ${orig_data_thecb}c_cbm001_fy`i'_`j', clear
}
rename *, lower 
 
destring stuage, replace
keep if stuage<21
drop stuage


destring stutotsch, replace

egen  maxhours=max(stutotsch), by(id2) 
egen  sumhours=sum(stutotsch), by(id2) 

keep if stutotsch==maxhours
bys id2 (stufice): keep if _n==1



drop maxhours stutotsch
foreach v in  stufice stusem stuyear stucip sumhours{ 
 rename `v' `v'_2yr
}


gen enroll_yr=20`i'
gen enr2yr=1

foreach v in stufice_2yr stucip_2yr stusem_2yr ///
 stuyear_2yr enroll_yr enr2yr sumhours_2yr{
 
rename `v' `v'_`j'
}

save ${work_dir}enr_2_`j'_20`i', replace
}
}


forval y=2001/2017{
use ${work_dir}enr_2_fall_`y', clear
append using ${work_dir}enr_2_sprg_`y'


destring stufice_2yr_fall stusem_2yr_fall stuyear_2yr_fall ///
stucip_2yr_fall enroll_yr_fall enr2yr_fall stufice_2yr_sprg ///
 stusem_2yr_sprg stuyear_2yr_sprg stucip_2yr_sprg enroll_yr_sprg ///
 enr2yr_sprg sumhours_2yr_sprg sumhours_2yr_fall, replace

collapse (max) stufice_2yr_fall stusem_2yr_fall stuyear_2yr_fall ///
stucip_2yr_fall enroll_yr_fall enr2yr_fall stufice_2yr_sprg ///
 stusem_2yr_sprg stuyear_2yr_sprg stucip_2yr_sprg ///
 enroll_yr_sprg enr2yr_sprg sumhours_2yr_sprg sumhours_2yr_fall, by(id2)
 
 
gen enr2yr_all=(enr2yr_sprg==1 | enr2yr_fall==1)


save ${work_dir}enr_2_all_`y', replace

}

***************************
*College Graduation
***************************

use ${orig_data_thecb}u_cbm009_fy01.dta, clear


foreach i in  "02" "03" "04" "05" "06" "07" ///
 "08" "09" "10" "11" "12" "13" "14" "15" "16" "17"{

append using ${orig_data_thecb}u_cbm009_fy`i'.dta
}
rename *, lower 

keep if gradlev=="2"

keep id2 gradfice  gradlev ///
 gradmaj gradmonth gradyear
 
 gen gradyearmon= gradyear + gradmonth
 
 destring gradyearmon, replace
 
 egen first_grad=min(gradyearmon), by(id2)
 
 
 keep if gradyearmon==first_grad
 bys id2: gen nvals=_n
 reshape wide gradfice gradmaj, i(id2) j(nvals)
 
 save ${work_dir}degrees, replace
 

*********************************
*Repeat the 3rd and 8th grade
*********************************

forval i=94/99{
use  ${orig_data_tea}p_enroll_demog`i'f, clear
rename *, lower

duplicates drop id2, force

keep if grade=="08" | grade=="07" | grade=="06" | grade=="09"

keep id2 speced district campus grade

foreach x of varlist _all{
rename `x' `x'_enr8
}

rename id2_enr8 id2

gen enroll8th_year=19`i'

save ${work_dir}enroll_8_19`i', replace
}


*File that measures when studetns were enrolled in grades 6-9
foreach i in "00" "01" "02" "03" "04" "05" "06" "07" "08" "09" "10" "11"  "12"  "13"  "14" "15" "16"  {
use ${orig_data_tea}p_enroll_demog`i'f, clear
rename *, lower

duplicates drop id2, force

keep if grade=="08" | grade=="07" | grade=="06" | grade=="09"

keep id2 speced district campus grade

foreach x of varlist _all{
rename `x' `x'_enr8
}

rename id2_enr8 id2

gen enroll8th_year=20`i'

save ${work_dir}enroll_8_20`i', replace
}

***********************************


*6rd
use ${work_dir}enroll_8_1994, clear
forval y=1995/2009{
append using ${work_dir}enroll_8_`y'
}

keep if grade_enr8=="06"


foreach v in district_enr campus_enr ///
 grade_enr speced_enr {
 rename `v'8 `v'6_
 }

 rename enroll8th_year enroll6th_year

 
bys id2 grade_enr (enroll6th_year): gen n=_n

keep if n<=2

reshape wide district_enr campus_enr grade_enr ///
speced_enr enroll6th_year, i(id2) j(n)

save ${work_dir}enroll6_all, replace

*7th

use ${work_dir}enroll_8_1994, clear
forval y=1995/2009{
append using ${work_dir}enroll_8_`y'
}

keep if grade_enr8=="07"


foreach v in district_enr campus_enr ///
 grade_enr speced_enr{
 rename `v'8 `v'7_
 }

 rename enroll8th_year enroll7th_year

  

bys id2 grade_enr (enroll7th_year): gen n=_n


keep if n<=2


reshape wide district_enr campus_enr grade_enr ///
speced_enr enroll7th_year, i(id2) j(n)

save ${work_dir}enroll7_all, replace

*8th

use ${work_dir}enroll_8_1994, clear
forval y=1995/2009{
append using ${work_dir}enroll_8_`y'
}
keep if grade_enr8=="08"

foreach v in district_enr campus_enr ///
 grade_enr speced_enr{
 rename `v'8 `v'8_
 }
 
bys id2 grade_enr (enroll8th_year): gen n=_n

keep if n<=2

reshape wide district_enr campus_enr grade_enr ///
speced_enr enroll8th_year, i(id2) j(n)

save ${work_dir}enroll8_all, replace


*9th

use ${work_dir}enroll_8_1994, clear
forval y=1995/2009{
append using ${work_dir}enroll_8_`y'
}

keep if grade_enr8=="09"


foreach v in district_enr campus_enr ///
 grade_enr speced_enr{
 rename `v'8 `v'9_
 }

 rename enroll8th_year enroll9th_year

  

bys id2 grade_enr (enroll9th_year): gen n=_n


keep if n<=2


reshape wide district_enr campus_enr grade_enr ///
speced_enr enroll9th_year, i(id2) j(n)

save ${work_dir}enroll9_all, replace


*Repeat the 3rd Grade

*File that measures when studetns were enrolled in grades 3-4
foreach i in  "94" "95" "96" "97" "98" "99"  ///
"00" "01" "02" "03" "04" "05" "06" ///
 "07" "08" "09"  {
use ${orig_data_tea}p_enroll_demog`i'f, clear
rename *, lower

duplicates drop id2, force

keep if grade=="03" | grade=="04" 

keep id2 speced district campus grade

foreach x of varlist _all{
rename `x' `x'_enr3
}

rename id2_enr id2


if inlist(`i',94,95,96,97,98,99){
gen enroll3rd_year=19`i'
save ${work_dir}enroll_3_19`i', replace
}
else {
gen enroll3rd_year=20`i'
save ${work_dir}enroll_3_20`i', replace
}
}

*Figures out first enrollment
*in 3rd and 4th grade

*3rd
use ${work_dir}enroll_3_1994, clear
forval y=1995/2009{
append using ${work_dir}enroll_3_`y'
}

keep if grade_enr3=="03"


foreach v in district_enr3 campus_enr3 ///
 grade_enr3 speced_enr3 enroll3rd_year{
 rename `v' `v'_
 }

bys id2 grade_enr (enroll3rd_year): gen n=_n

keep if n<=2

reshape wide district_enr campus_enr grade_enr ///
speced_enr enroll3rd_year, i(id2) j(n)

save ${work_dir}enroll3_all, replace

*4th

use ${work_dir}enroll_3_1994, clear
forval y=1995/2009{
append using ${work_dir}enroll_3_`y'
}

keep if grade_enr3=="04"

foreach v in district_enr campus_enr ///
 grade_enr speced_enr {
 rename `v'3 `v'4_
 }
 
rename enroll3rd_year enroll4th_year

bys id2 grade_enr (enroll4th_year): gen n=_n


keep if n<=2


reshape wide district_enr campus_enr grade_enr ///
speced_enr enroll4th_year, i(id2) j(n)

save ${work_dir}enroll4_all, replace



**************************************
*AP
**************************************

forvalues y=1994/2017 {
 di "Starting to process courses for `y'"
 if `y'<2000 local j=`y'-1900
 else if `y'==2000 local j="00"
 else if `y'<2010 {
   local j=`y'-2000
   local j="0`j'"
 }
 else local j=`y'-2000

 use ${orig_data}TEA\p_course_complete`j', clear
 rename *, lower
  
 

 drop if id2==""
 drop if course_result~="1"
**Coding for various course types
*Algebra 1: "03100500", "03100505", "03100507"
 gen byte alg1=inlist(service,"03100500", "03100505", "03100507")
*Algebra 2: "03100600", "03100605", "03100607"
 gen byte alg2=inlist(service,"03100600", "03100605", "03100607")
*Geometry: "03100700", "03100705", "03100707"
 gen byte geom=inlist(service,"03100700", "03100705", "03100707")
*Precalculus: 03101100
 gen byte precalc=(service=="03101100")
*Calculus: A3100101, A3100102, N1110018
 gen byte calc=inlist(service, "A3100101", "A3100102", "N1110018")
  replace calc=1 if service=="A3100100"
*AP: All courses starting with A3
 gen byte ap=(substr(service,1,2)=="A3")
 
 
 gen byte bio=(service=="03010200")
 gen byte chem=(service=="03040000")
 gen byte physics=(service=="03050000")
 
 gen byte ap_calcbc=(service=="A3100102")
 gen byte ap_calcab=(service=="A3100101")
 gen byte ap_ushist=(service=="A3340100")
 gen byte ap_eng=inlist(service,"A3220100","A3220200")
 gen byte ap_sci=inlist(service,"A3010200", "A3040000", "A3050002", "A3050003", "A3050004")
 
 
 
 gen languages=0
 replace languages=1 if strmatch(service,"034*")
 
 gen byte rem_alg=inlist(service,"03100505","03100507")
 
 
 
keep id2 alg1-rem_alg
gen byte keep=0
foreach v of varlist alg1-rem_alg {
  replace keep=1 if `v'==1
}
keep if keep==1
collapse (sum) alg1-rem_alg, by(id2)


save ${work_dir}courses`y', replace
}


use ${work_dir}courses1999, clear

forval y=2000/2017{
append using ${work_dir}courses`y'

}


collapse (sum) alg1 alg2 geom precalc calc ap bio chem physics ap_calcbc ///
ap_calcab ap_ushist ap_eng ap_sci languages rem_alg, by(id2)

save ${work_dir}courses_all, replace


*************************************************************
*Preps Earnings
*************************************************************

**** For Each Quarter it creates the sum of earnings for each person
forval i=2000/2016{
forval j=1/4{
use ${orig_data}TWC\ui`i'`j', clear

rename *, lower

collapse (sum) twcwage, by(id2) 


save ${work_dir}ui`i'`j', replace
	}
}

**** ADDING QUARTERLY WAGES TO THE YEAR LEVEL
forval i=2000/2016{

use ${work_dir}ui`i'1, clear
forval j=2/4{
append using ${work_dir}ui`i'`j'  
}

replace twcwage=0 if missing(twcwage)

collapse (sum) twcwage, by(id2) 


save ${work_dir}ui`i', replace
}

**** DEFLATING WAGES 
forval i=2001/2016{
use ${work_dir}ui`i'

*CPI taken from www.bls.gov/regions/midwest/data/consumerpriceindexhistorical_us_table.pdf
*CPI-U All urban consumers
gen CPI=.
replace CPI = 245.120 if `i'==2017
replace CPI = 240.007 if `i'==2016
replace CPI = 237.017 if `i'==2015
replace CPI = 236.736 if `i'==2014
replace CPI = 232.957 if `i'==2013
replace CPI = 229.524 if `i'==2012
replace CPI = 223.467 if `i'==2011 
replace CPI = 217.631 if `i'==2010
replace CPI = 212.709 if `i'==2009
replace CPI = 213.528 if `i'==2008
replace CPI = 205.352 if `i'==2007
replace CPI = 199.800 if `i'==2006
replace CPI = 193.300 if `i'==2005
replace CPI = 187.400 if `i'==2004
replace CPI = 184.200 if `i'==2003
replace CPI = 178.800 if `i'==2002
replace CPI = 176.200 if `i'==2001

gen Index    = 229.524/CPI   // BASE YEAR: 2012
gen realwage = Index*twcwage

drop CPI Index twcwage 

save ${work_dir}\ui`i'_real, replace
}


***************************************
*3rd grade cohort Size
***************************************

use ${work_dir}enroll_3_1994, clear
forval y=1995/2009{
append using ${work_dir}enroll_3_`y'
}

keep if grade_enr3=="03"

gen N=1

collapse (sum) N, by(enroll3rd_year campus_enr3)

save ${work_dir}3rd_cohortsize, replace


*************************************************************
*************************************************************
*************************************************************


*Merging outcomes together



*************************************************************
*************************************************************
*************************************************************

forval i=1994/2006{
*forval i=2007/2008{ Can't do until add in the extra education years. 
local j=`i'+5
use ${work_dir}grade3_testing_`i', replace

*Merge in Grade 8 Tests
merge 1:1 id2 subject using ${work_dir}grade8_testing_`j',  keep(1 3)

gen missing_grade8test=0
replace missing_grade8test=1 if _merge==1

drop _merge

merge m:1 id2 using ${work_dir}enroll_8_`j', keep(1 3)

gen enr_5yearslater=0
replace enr_5yearslater=1 if _merge==3

drop _merge


********************************************
*Merging in HS Grad for 3 Years
*3 years to account for retention
* _1 represents the on time year, _2 is the year after
* _3 is 2 years after on time
********************************************


local k=`i'+9
if `k'<2017 {
merge m:1 id2 using ${work_dir}p_graduate`k'

drop if _merge==2

replace grad_graduated_hs=0 if missing(grad_graduated_hs)
replace grad_distinguished_hs=0 if missing(grad_distinguished_hs)
replace grad_recommended_hs=0 if missing(grad_recommended_hs)
replace grad_minimum_hs=0 if missing(grad_minimum_hs)
drop _merge


foreach v in grad_district grad_campus  ///
 grad_graduated_hs grad_distinguished_hs grad_minimum_hs ///
 grad_recommended_hs grad_gradhs_year{

 rename `v' `v'_1
 
 }
 }


local l=`i'+10
if `l'<2017 {
merge m:1 id2 using ${work_dir}p_graduate`l'

drop if _merge==2

replace grad_graduated_hs=0 if missing(grad_graduated_hs)
replace grad_distinguished_hs=0 if missing(grad_distinguished_hs)
replace grad_recommended_hs=0 if missing(grad_recommended_hs)
replace grad_minimum_hs=0 if missing(grad_minimum_hs)
drop _merge

foreach v in grad_district grad_campus  ///
 grad_graduated_hs grad_distinguished_hs grad_minimum_hs ///
 grad_recommended_hs grad_gradhs_year{

 rename `v' `v'_2
}
}


local m=`i'+11
if `m'<2017 {
merge m:1 id2 using ${work_dir}p_graduate`m'

drop if _merge==2

replace grad_graduated_hs=0 if missing(grad_graduated_hs)
replace grad_distinguished_hs=0 if missing(grad_distinguished_hs)
replace grad_recommended_hs=0 if missing(grad_recommended_hs)
replace grad_minimum_hs=0 if missing(grad_minimum_hs)
drop _merge


foreach v in grad_district grad_campus ///
 grad_graduated_hs grad_distinguished_hs grad_minimum_hs ///
 grad_recommended_hs grad_gradhs_year{

 rename `v' `v'_3
 }
 }
 
********************************************
*Merging in College Enroll
*3 years to account for retention
* _1 represents the on time year, _2 is the year after
* _3 is 2 years after on time
********************************************

local k=`i'+9
if `k'<2017{
merge m:1 id2 using ${work_dir}enr_4_all_`k'
drop if _merge==2

replace enr4yr_all=0 if missing(enr4yr_all)


drop _merge

merge m:1 id2 using ${work_dir}enr_2_all_`k'
drop if _merge==2

replace enr2yr_all=0 if missing(enr2yr_all)



drop _merge

foreach v in stufice_4yr_fall stusem_4yr_fall ///
 stuyear_4yr_fall stucip_4yr_fall enroll_yr_fall ///
 enr4yr_fall stufice_4yr_sprg stusem_4yr_sprg ///
 stuyear_4yr_sprg stucip_4yr_sprg enroll_yr_sprg ///
 enr4yr_sprg enr4yr_all stufice_2yr_fall stusem_2yr_fall ///
 stuyear_2yr_fall stucip_2yr_fall enr2yr_fall ///
 stufice_2yr_sprg stusem_2yr_sprg stuyear_2yr_sprg ///
 stucip_2yr_sprg enr2yr_sprg enr2yr_all{
 
 rename `v' `v'_0
 }

}


local l=`i'+10
if `l'<2017{
merge m:1 id2 using ${work_dir}enr_4_all_`l'
drop if _merge==2

replace enr4yr_all=0 if missing(enr4yr_all)


drop _merge

merge m:1 id2 using ${work_dir}enr_2_all_`l'
drop if _merge==2

replace enr2yr_all=0 if missing(enr2yr_all)



drop _merge

foreach v in stufice_4yr_fall stusem_4yr_fall ///
 stuyear_4yr_fall stucip_4yr_fall enroll_yr_fall ///
 enr4yr_fall stufice_4yr_sprg stusem_4yr_sprg ///
 stuyear_4yr_sprg stucip_4yr_sprg enroll_yr_sprg ///
 enr4yr_sprg enr4yr_all stufice_2yr_fall stusem_2yr_fall ///
 stuyear_2yr_fall stucip_2yr_fall enr2yr_fall ///
 stufice_2yr_sprg stusem_2yr_sprg stuyear_2yr_sprg ///
 stucip_2yr_sprg enr2yr_sprg enr2yr_all{
 
 rename `v' `v'_1
 }

}


local m=`i'+11
if `m'<2017{
merge m:1 id2 using ${work_dir}enr_4_all_`m'
drop if _merge==2

replace enr4yr_all=0 if missing(enr4yr_all)


drop _merge

merge m:1 id2 using ${work_dir}enr_2_all_`m'
drop if _merge==2

replace enr2yr_all=0 if missing(enr2yr_all)

foreach v in stufice_4yr_fall stusem_4yr_fall ///
 stuyear_4yr_fall stucip_4yr_fall enroll_yr_fall ///
 enr4yr_fall stufice_4yr_sprg stusem_4yr_sprg ///
 stuyear_4yr_sprg stucip_4yr_sprg enroll_yr_sprg ///
 enr4yr_sprg enr4yr_all stufice_2yr_fall stusem_2yr_fall ///
 stuyear_2yr_fall stucip_2yr_fall enr2yr_fall ///
 stufice_2yr_sprg stusem_2yr_sprg stuyear_2yr_sprg ///
 stucip_2yr_sprg enr2yr_sprg enr2yr_all{
 
 
 rename `v' `v'_2
 }


drop _merge
}


local n=`i'+12
if `n'<2017{
merge m:1 id2 using ${work_dir}enr_4_all_`n'
drop if _merge==2

replace enr4yr_all=0 if missing(enr4yr_all)


drop _merge

merge m:1 id2 using ${work_dir}enr_2_all_`m'
drop if _merge==2

replace enr2yr_all=0 if missing(enr2yr_all)

foreach v in stufice_4yr_fall stusem_4yr_fall ///
 stuyear_4yr_fall stucip_4yr_fall enroll_yr_fall ///
 enr4yr_fall stufice_4yr_sprg stusem_4yr_sprg ///
 stuyear_4yr_sprg stucip_4yr_sprg enroll_yr_sprg ///
 enr4yr_sprg enr4yr_all stufice_2yr_fall stusem_2yr_fall ///
 stuyear_2yr_fall stucip_2yr_fall enr2yr_fall ///
 stufice_2yr_sprg stusem_2yr_sprg stuyear_2yr_sprg ///
 stucip_2yr_sprg enr2yr_sprg enr2yr_all{
 
 
 rename `v' `v'_3
 }


drop _merge
}



local o=`i'+15
if `o'<2017{
merge m:1 id2 using ${work_dir}ui`o'_real, keep(1 3) nogen
rename realwage real_wage_15
}


local p=`i'+16
if `p'<2017{
merge m:1 id2 using ${work_dir}ui`p'_real, keep(1 3) nogen
rename realwage real_wage_16
}

local q=`i'+17
if `q'<2017{
merge m:1 id2 using ${work_dir}ui`q'_real, keep(1 3) nogen
rename realwage real_wage_17
}



local r=`i'+18
if `r'<2017{
merge m:1 id2 using ${work_dir}ui`r'_real, keep(1 3) nogen
rename realwage real_wage_18
}





save ${work_dir}estimating_1_`i', replace
}




use ${work_dir}estimating_1_1994, clear

forval i=1995/2006{
append using ${work_dir}estimating_1_`i'
}

*Merges in College Degrees
merge m:1 id2 using ${work_dir}degrees

drop if _merge==2

gen bachelors=0
replace bachelors=1 if _merge==3

destring gradyear , replace

gen yearstodegree=gradyear-stu_year_3

gen grad4in4y=0 if stu_year_3<=2004
replace grad4in4y=1 if yearstodegree<=13 & !missing(gradyear) & stu_year<=2004

gen grad4in6y=0 if stu_year<=2002
replace grad4in6y=1 if yearstodegree<=15 & !missing(gradyear)  & stu_year<=2002

gen grad4in8y=0 if stu_year<=2000
replace grad4in8y=1 if yearstodegree<=17 & !missing(gradyear)  & stu_year<=2000


egen sub_year_school=group(subject stu_year campus_3)

gen enr4yr_any3years=0
replace enr4yr_any3years=1 if enr4yr_all_1==1 | enr4yr_all_2==1 | enr4yr_all_3== 1

gen enr2yr_any3years=0
replace enr2yr_any3years=1 if enr2yr_all_1==1 | enr2yr_all_2==1 | enr2yr_all_3== 1

gen evergrad_hs=0
replace evergrad_hs=1 if grad_graduated_hs_1==1 
replace evergrad_hs=1 if grad_graduated_hs_2==1 
replace evergrad_hs=1 if grad_graduated_hs_3==1 


foreach v in ordinal_rank_3 ordinal_rank_8 state_perc_3 state_perc_8{
replace `v'=1-`v'
}

drop _merge
merge m:1 id2 using ${work_dir}enroll3_all, keep (1 3)

drop _merge
merge m:1 id2 using ${work_dir}enroll4_all, keep (1 3)


g take3rdtestontime=0 
replace take3rdtestontime=1 if stu_year_3==enroll3rd_year_1 ///
& !missing(enroll3rd_year_1) & !missing(stu_year_3) 


gen repeat3rd=0
replace repeat3rd=1 if enroll3rd_year_2-enroll3rd_year_1==1 ///
& !missing(enroll3rd_year_2) & !missing(enroll3rd_year_2)

gen repeat4th=0
replace repeat4th=1 if enroll4th_year2-enroll4th_year1==1 ///
& !missing(enroll4th_year1) & !missing(enroll4th_year2)


foreach x in 6 7 8 9{
cap drop _merge
merge m:1 id2 using ${work_dir}enroll`x'_all, keep (1 3)

gen repeat`x'th=0
replace repeat`x'th=1 if enroll`x'th_year2-enroll`x'th_year1==1 ///
& !missing(enroll`x'th_year1) & !missing(enroll`x'th_year2)

drop district_enr`x'* campus_enr`x'* speced_enr`x'* enroll`x'th_year* _merge
}


cap drop _merge 
merge m:1 id2 using ${work_dir}courses_all, keep(1 3)

foreach v in alg1 alg2 geom precalc calc ap bio chem physics ///
ap_calcbc ap_calcab ap_ushist ap_eng ap_sci languages rem_alg{
replace `v'=0 if missing(`v')
}

rename enroll3rd_year_1 enroll3rd_year
rename campus_enr3_1 campus_enr3

merge m:1 enroll3rd_year campus_enr3 ///
using  ${work_dir}3rd_cohortsize, keep (1 3) nogen

rename  enroll3rd_year enroll3rd_year_1
rename  campus_enr3 campus_enr3_1 

save ${work_dir}estimating, replace

use ${work_dir}estimating, replace


gen raw_read_3=rawred_3
replace raw_read_3=r_raw_3 if raw_read_3==.

gen raw_math_3=m_raw_3
replace raw_math_3=rawmth_3 if raw_math_3==.

*Drop duplicates
sort id2 subject
by id2:gen count=_N
ta count

drop if stu_year_3!=enroll3rd_year_1 & count>2 &  enroll3rd_year_1!=.

drop count
sort id2 subject
by id2:gen count=_N
ta count
drop count

by id2:egen sum=sum(subject)
drop if sum!=3
drop sum

drop  school_year year_3
rename stu_year_3 year_3

drop r_scode_8 m_scode_8


*coding up variables
encode sex_3, gen(male)
replace male=male-2
drop if male==-1
drop sex_3

encode disadv_3, gen(fsme)
*Need value labels
recode fsme (1=.) (2=0) (3=1) (4=2) (5=9)
label define fsme 0 "Not Econ Dis" 1 "Free School Eligible" 2 "Reduce Price Eligible" 9 "Other Disadvantage",modify
label values fsme fsme
order fsme, after(disadv_3)
label var fsme "Free School Meal Eligible Yr3"
drop disadv_3

gen fsme_dum=1 if fsme==1|fsme==2|fsme==9
replace fsme_dum=0 if fsme==0

encode leprof_3,gen(esl) 
*Need value labels
recode esl (1=.) (2=0) (3=1) (4=2) (5=3) (6=4)
label define esl 0 "Not LEP" 1 "LEP" 2 "C-Exited LEP" 3 "F-Exited LEP M1" 4 "S-Exited LEP M2" ,modify
label values esl esl
order esl, after(leprof_3)
label var esl "Limited English Proficency Yr3"
drop leprof_3

gen esl_dum=1 if esl==1|esl==2|esl==3|esl==4
replace esl_dum=0 if esl==0

encode ethnic_3, gen(ethnic)
replace ethnic=ethnic-1
replace ethnic=. if ethnic==0
recode ethnic (5=0)
label define ethnic 0 "White, Non-Hispanic" 1 "American Indian" 2 "Asain" 3 "Black, Non-Hispanic" 4 "Hispanic" ,modify
label values ethnic ethnic 
order ethnic, after(ethnic_3)
label var ethnic "Ethnicity Yr3"
drop ethnic_3

gen ethnic_dum=1 if ethnic==1|ethnic==2|ethnic==3|ethnic==4
replace ethnic_dum=0 if ethnic==0


global controls male fsme_dum esl_dum ethnic_dum

gen ln_real_wage_15=ln(real_wage_15)
gen ln_real_wage_16=ln(real_wage_16)
gen ln_real_wage_17=ln(real_wage_17)
gen ln_real_wage_18=ln(real_wage_18) 

*real wages with 0s
gen real_wage0_15=0 if year_3<2002
replace real_wage0_15=real_wage_15 if year_3<2002 & !missing(real_wage_15)

gen real_wage0_16=0 if year_3<2001
replace real_wage0_16=real_wage_16 if year_3<2001 & !missing(real_wage_16)

gen real_wage0_17=0 if year_3<2000
replace real_wage0_17=real_wage_17 if year_3<2000 & !missing(real_wage_17)

gen real_wage0_18=0 if year_3<1999
replace real_wage0_18=real_wage_18 if year_3<1999 & !missing(real_wage_18)


gen in_labor_15=0 if year_3<2002
replace in_labor_15=1 if year_3<2002 & real_wage0_15>0

gen in_labor_16=0 if year_3<2001
replace in_labor_16=1 if year_3<2001 & real_wage0_16>0

gen in_labor_17=0 if year_3<2000
replace in_labor_17=1 if year_3<2000 & real_wage0_17>0

gen in_labor_18=0 if year_3<1999
replace in_labor_18=1 if year_3<1999 & real_wage0_18>0

egen mean_real_wage0_1518=rowmean(real_wage0_15 real_wage0_16 real_wage0_17 real_wage0_18)
gen ln_mean_real_wage0_1518=ln(mean_real_wage0_1518) 

egen y3_sch_coh=group(campus_3 year_3)

egen y3_sch_coh_sub=group(campus_3 year_3 subject )

egen y8_sch_coh_original=group(campus_8 year_3)
label var y8_sch_coh_original "8th Grade School with original cohort - not accounting for retention"

egen y8_sch_coh=group(campus_8 enr8_year)
label var y8_sch_coh "8th Grade School with cohort who took same test - accounting for retention"

egen y8_sch_coh_sub=group(campus_8 enr8_year subject)
label var y8_sch_coh_sub "8th Grade School Subject with cohort who took same test - accounting for retention"


gen ongrade=0
replace ongrade=1 if grade_enr8=="08"|grade_enr8=="09"
replace ongrade=. if grade_enr8==""

egen evergrad_hs_dist=rowtotal(grad_distinguished_hs_1 grad_distinguished_hs_2 grad_distinguished_hs_3)
egen enrcollege=rowtotal(enr4yr_any3years enr2yr_any3years)


drop num_class_3
bys campus_3 year_3 subject: gen num_class_3_all=_N
label var num_class_3_all "Total obs by sch year subject Y3"
bys year_3 subject: gen num_cohort_3_all=_N
label var num_cohort_3_all "Total obs by year subject Y3"

egen num_class_ontime=sum(take3rdtestontime), by(campus_3 year_3 subject)
label var num_class_ontime "Total obs by sch year subject, Ontime Y3"
egen num_cohort_ontime=sum(take3rdtestontime), by(year_3 subject)
label var num_cohort_ontime "Total obs by year subject, Ontime  Y3"

order y3_sch_coh y3_sch_coh_sub y8_sch_coh y8_sch_coh_sub num_cohort_3_all num_class_3_all num_cohort_ontime num_class_ontime, after(month_3)


sort id subject
by id: replace raw_math_3=raw_math_3[_n-1] if raw_math_3==. & raw_math_3[_n-1]!=.
by id: replace raw_read_3=raw_read_3[_n+1] if raw_read_3==. & raw_read_3[_n+1]!=.
label var raw_math_3 "Raw Math Score Y3"
label var raw_read_3 "Raw Reading Score Y3"

*Alternate Definitions for ALL
*On time students - mean rank
*All students - mean rank
*All students - bottom rank
*All students - random rank

********************************************************************************
*Test scores and rank based on ON TIME students 
********************************************************************************
egen st_rrank_3_ontime=rank(raw_read_3) if take3rdtestontime==1, by(year_3 subject)
gen st_prrank_3_ontime=((st_rrank_3-1)/(num_cohort_ontime-1))

egen st_mrank_3_ontime=rank(raw_math_3) if take3rdtestontime==1, by(year_3 subject)
gen st_pmrank_3_ontime=((st_mrank_3_ontime-1)/(num_cohort_ontime-1))

egen cl_rrank_3_ontime=rank(raw_read_3) if take3rdtestontime==1, by(campus_3 year_3 subject)
gen cl_prrank_3_ontime=((cl_rrank_3_ontime-1)/(num_class_ontime-1))

egen cl_mrank_3_ontime=rank(raw_math_3) if take3rdtestontime==1, by(campus_3 year_3 subject)
gen cl_pmrank_3_ontime=((cl_mrank_3_ontime-1)/(num_class_ontime-1))


gen st_rank_3_ontime=st_mrank_3_ontime if subject==1
replace st_rank_3_ontime=st_rrank_3_ontime if subject==2 & st_rank_3_ontime==.

gen st_prank_3_ontime=st_pmrank_3_ontime if subject==1
replace st_prank_3_ontime=st_prrank_3_ontime if subject==2 & st_prank_3_ontime==.

gen cl_rank_3_ontime=cl_mrank_3_ontime if subject==1
replace cl_rank_3_ontime=cl_rrank_3_ontime if subject==2 & cl_rank_3_ontime==.

gen cl_prank_3_ontime=cl_pmrank_3_ontime if subject==1
replace cl_prank_3_ontime=cl_prrank_3_ontime if subject==2 & cl_prank_3_ontime==.

label var st_mrank_3_ontime "Math Rank in State Yr 3, Ontime"
label var st_rrank_3_ontime "Reading Rank in State Yr 3, Ontime"
label var st_pmrank_3_ontime "Math Percentile Rank in State Yr 3, Ontime"
label var st_prrank_3_ontime "Reading Percentile Rank in State Yr 3, Ontime"
label var st_rank_3_ontime "Rank in State Yr 3, Ontime"
label var st_prank_3_ontime "Percentile in State Yr 3, Ontime"

label var cl_mrank_3_ontime "Math Rank in Class Yr 3, Ontime"
label var cl_rrank_3_ontime "Reading Rank in Class Yr 3, Ontime"
label var cl_pmrank_3_ontime "Math Percentile Rank in Class Yr 3, Ontime"
label var cl_prrank_3_ontime "Reading Percentile Rank in Class Yr 3, Ontime"
label var cl_rank_3_ontime "Rank in Class Yr 3, Ontime"
label var cl_prank_3_ontime "Percentile in in Class Yr 3, Ontime"

drop morder_3 rorder_3 state_perc_3 morder_class_3 rorder_class_3 ordinal_rank_3 state_perc_3 ordinal_rank_3

********************************************************************************
*Test scores and rank based on all students 
********************************************************************************
egen st_rrank_3_all=rank(raw_read_3), by(year_3 subject)
gen st_prrank_3_all=((st_rrank_3_all-1)/(num_cohort_3_all-1))

egen st_mrank_3_all=rank(raw_math_3), by(year_3 subject)
gen st_pmrank_3_all=((st_mrank_3_all-1)/(num_cohort_3_all-1))

egen cl_rrank_3_all=rank(raw_read_3), by(campus_3 year_3 subject)
gen cl_prrank_3_all=((cl_rrank_3_all-1)/(num_class_3_all-1))

egen cl_mrank_3_all=rank(raw_math_3), by(campus_3 year_3 subject)
gen cl_pmrank_3_all=((cl_mrank_3_all-1)/(num_class_3_all-1))

gen st_rank_3_all=st_mrank_3_all if subject==1
replace st_rank_3_all=st_rrank_3_all if subject==2 & st_rank_3_all==.

gen st_prank_3_all=st_pmrank_3_all if subject==1
replace st_prank_3_all=st_prrank_3_all if subject==2 & st_prank_3_all==.

gen cl_rank_3_all=cl_mrank_3_all if subject==1
replace cl_rank_3_all=cl_rrank_3_all if subject==2 & cl_rank_3_all==.

gen cl_prank_3_all=cl_pmrank_3_all if subject==1
replace cl_prank_3_all=cl_prrank_3_all if subject==2 & cl_prank_3_all==.


label var st_prank_3_all "Percentile Rank in State Yr3, All Students"
label var cl_prank_3_all "Percentile Rank in State Yr3"

label var st_mrank_3_all "Math Rank in State Yr 3, All Students"
label var st_rrank_3_all "Reading Rank in State Yr 3, All Students"
label var st_pmrank_3_all "Math Percentile Rank in State Yr 3, All Students"
label var st_prrank_3_all "Reading Percentile Rank in State Yr 3, All Students"
label var st_rank_3_all "Rank in State Yr 3, All Students"
label var st_prank_3_all "Percentile in State Yr 3, All Students"

label var cl_mrank_3_all "Math Rank in Class Yr 3, All Students"
label var cl_rrank_3_all "Reading Rank in Class Yr 3, All Students"
label var cl_pmrank_3_all "Math Percentile Rank in Class Yr 3, All Students"
label var cl_prrank_3_all "Reading Percentile Rank in Class Yr 3, All Students"
label var cl_rank_3_all "Rank in Class Yr 3, All Students"
label var cl_prank_3_all "Percentile in in Class Yr 3, All Students"


********************************************************************************
*Test scores and rank based on all students 
*All students - bottom rank
********************************************************************************
sort subject year_3 raw_read_3
by subject year_3:gen st_rrank_3_bottom=_n if raw_read_3!=.
by subject year_3:replace st_rrank_3_bottom=st_rrank_3_bottom[_n-1] if raw_read_3==raw_read_3[_n-1] & raw_read_3!=.
by subject year_3: gen st_prrank_3_bottom=(st_rrank_3_bottom-1)/(_N-1)

sort subject year_3 raw_math_3
by subject year_3:gen st_mrank_3_bottom=_n if raw_math_3!=.
by subject year_3:replace st_mrank_3_bottom=st_mrank_3_bottom[_n-1] if raw_math_3==raw_math_3[_n-1] & raw_math_3!=.
by subject year_3: gen st_pmrank_3_bottom=(st_mrank_3_bottom-1)/(_N-1)

sort subject year_3 campus_3 raw_read_3
by subject year_3 campus_3:gen cl_rrank_3_bottom=_n if raw_read_3!=.
replace cl_rrank_3_bottom=cl_rrank_3_bottom[_n-1] if raw_read_3==raw_read_3[_n-1] & raw_read_3!=.
by subject year_3 campus_3: gen cl_prrank_3_bottom=(cl_rrank_3_bottom-1)/(_N-1)

sort subject year_3 campus_3 raw_math_3
by subject year_3 campus_3:gen cl_mrank_3_bottom=_n if raw_math_3!=.
replace cl_mrank_3_bottom=cl_mrank_3_bottom[_n-1] if raw_math_3==raw_math_3[_n-1] & raw_math_3!=.
by subject year_3 campus_3: gen cl_pmrank_3_bottom=(cl_mrank_3_bottom-1)/(_N-1)

gen st_rank_3_bottom=st_mrank_3_bottom if subject==1
replace st_rank_3_bottom=st_rrank_3_bottom if subject==2 & st_rank_3_bottom==.

gen st_prank_3_bottom=st_pmrank_3_bottom if subject==1
replace st_prank_3_bottom=st_prrank_3_bottom if subject==2 & st_prank_3_bottom==.

gen cl_rank_3_bottom=cl_mrank_3_bottom if subject==1
replace cl_rank_3_bottom=cl_rrank_3_bottom if subject==2 & cl_rank_3_bottom==.

gen cl_prank_3_bottom=cl_pmrank_3_bottom if subject==1
replace cl_prank_3_bottom=cl_prrank_3_bottom if subject==2 & cl_prank_3_bottom==.


label var st_mrank_3_bottom "State Math Rank State Yr 3, All Students Bottom Ties"
label var st_rrank_3_bottom "Reading Rank in State Yr 3, All Students Bottom Ties"
label var st_pmrank_3_bottom "Math Percentile Rank in State Yr 3, All Students Bottom Ties"
label var st_prrank_3_bottom "Reading Percentile Rank in State Yr 3, All Students Bottom Ties"
label var st_rank_3_bottom "Rank in State Yr 3, All Students Bottom Ties"
label var st_prank_3_bottom "Percentile in State Yr 3, All Students Bottom Ties"

label var cl_mrank_3_bottom "Math Rank in Class Yr 3, All Students Bottom Ties"
label var cl_rrank_3_bottom "Reading Rank in Class Yr 3, All Students Bottom Ties"
label var cl_pmrank_3_bottom "Math Percentile Rank in Class Yr 3, All Students Bottom Ties"
label var cl_prrank_3_bottom "Reading Percentile Rank in Class Yr 3, All Students Bottom Ties"
label var cl_rank_3_bottom "Rank in Class Yr 3, All Students Bottom Ties"
label var cl_prank_3_bottom "Percentile in in Class Yr 3, All Students Bottom Ties"


********************************************************************************
*Test scores and rank based on all students 
*All students - break ties randomly rank
********************************************************************************
set seed 007
gen uniform_tiebreaker=runiform()

sort subject year_3 raw_read_3 uniform_tiebreaker
by subject year_3:gen st_rrank_3_random=_n if raw_read_3!=.
by subject year_3: gen st_prrank_3_random=(st_rrank_3_random-1)/(_N-1)

sort subject year_3 raw_math_3 uniform_tiebreaker
by subject year_3:gen st_mrank_3_random=_n if raw_math_3!=.
by subject year_3: gen st_pmrank_3_random=(st_mrank_3_random-1)/(_N-1)

sort subject year_3 campus_3 raw_read_3 uniform_tiebreaker
by subject year_3 campus_3:gen cl_rrank_3_random=_n if raw_read_3!=.
by subject year_3 campus_3: gen cl_prrank_3_random=(cl_rrank_3_random-1)/(_N-1)

sort subject year_3 campus_3 raw_math_3 uniform_tiebreaker
by subject year_3 campus_3:gen cl_mrank_3_random=_n if raw_math_3!=.
by subject year_3 campus_3: gen cl_pmrank_3_random=(cl_mrank_3_random-1)/(_N-1)

gen st_rank_3_random=st_mrank_3_random if subject==1
replace st_rank_3_random=st_rrank_3_random if subject==2 & st_rank_3_random==.

gen st_prank_3_random=st_pmrank_3_random if subject==1
replace st_prank_3_random=st_prrank_3_random if subject==2 & st_prank_3_random==.

gen cl_rank_3_random=cl_mrank_3_random if subject==1
replace cl_rank_3_random=cl_rrank_3_random if subject==2 & cl_rank_3_random==.

gen cl_prank_3_random=cl_pmrank_3_random if subject==1
replace cl_prank_3_random=cl_prrank_3_random if subject==2 & cl_prank_3_random==.


label var st_mrank_3_random "State Math Rank State Yr 3, All Students Random Ties"
label var st_rrank_3_random "Reading Rank in State Yr 3, All Students Random Ties"
label var st_pmrank_3_random "Math Percentile Rank in State Yr 3, All Students Random Ties"
label var st_prrank_3_random "Reading Percentile Rank in State Yr 3, All Students Random Ties"
label var st_rank_3_random "Rank in State Yr 3, All Students Random Ties"
label var st_prank_3_random "Percentile in State Yr 3, All Students Random Ties"

label var cl_mrank_3_random "Math Rank in Class Yr 3, All Students Random Ties"
label var cl_rrank_3_random "Reading Rank in Class Yr 3, All Students Random Ties"
label var cl_pmrank_3_random "Math Percentile Rank in Class Yr 3, All Students Random Ties"
label var cl_prrank_3_random "Reading Percentile Rank in Class Yr 3, All Students Random Ties"
label var cl_rank_3_random "Rank in Class Yr 3, All Students Random Ties"
label var cl_prank_3_random "Percentile in in Class Yr 3, All Students Random Ties"



order raw_math_3 raw_read_3 st_prank_3_all cl_prank_3_all  st_rank_3_all st_mrank_3_all st_rrank_3_all st_pmrank_3_all st_prrank_3_all cl_rank_3_all cl_mrank_3_all cl_rrank_3_all cl_pmrank_3_all cl_prrank_3_all  st_prank_3_ontime cl_prank_3_ontime  st_rank_3_ontime st_mrank_3_ontime st_rrank_3_ontime st_pmrank_3_ontime st_prrank_3_ontime cl_rank_3_ontime cl_mrank_3_ontime cl_rrank_3_ontime cl_pmrank_3_ontime cl_prrank_3_ontime, after(num_class_ontime)

order ethnic fsme esl male fsme_dum esl_dum ethnic_dum, after( y8_sch_coh)


********************************************************************************
**************************8th Grade Testscores**********************************
********************************************************************************


gen raw_read_8=r_raw_8
replace raw_read_8=rawred_8 if raw_read_8==.
gen raw_math_8=m_raw_8
replace raw_math_8=rawmth_8 if raw_math_8==.

sort id subject
by id: replace raw_math_8=raw_math_8[_n-1] if raw_math_8==. & raw_math_8[_n-1]!=.
by id: replace raw_read_8=raw_read_8[_n+1] if raw_read_8==. & raw_read_8[_n+1]!=.
label var raw_math_8 "Raw Math Score Y8 - only defined if ontime"
label var raw_read_8 "Raw Reading Score Y8 - only defined if ontime"

rename enr8_year year_8

gen tag_math=1 if raw_math_8!=.
gen tag_read=1 if raw_read_8!=.

egen num_class_8_nomis_math=sum(tag_math), by(campus_8 year_8 subject)
label var num_class_8_nomis_math "Total obs by sch year subject Y8 with our math test measure"
egen num_cohort_8_nomis_math=sum(tag_math), by(year_8 subject)
label var num_cohort_8_nomis_math "Total obs by year subject Y8 with our math test measure"

egen num_class_8_nomis_read=sum(tag_read), by(campus_8 year_8 subject)
label var num_class_8_nomis_read "Total obs by sch year subject Y8 with our read test measure"
egen num_cohort_8_nomis_read=sum(tag_read), by(year_8 subject)
label var num_cohort_8_nomis_read "Total obs by year subject Y8 with our read test measure"

drop tag_math tag_read

replace num_class_8_nomis_math=. if campus_8==""
replace num_cohort_8_nomis_math=. if campus_8==""
replace num_class_8_nomis_read=. if campus_8==""
replace num_cohort_8_nomis_read=. if campus_8==""


egen st_rrank_8_nomis=rank(raw_read_8) if campus_8!="", by(year_8 subject) 
gen st_prrank_8_nomis=((st_rrank_8_nomis-1)/(num_cohort_8_nomis_read-1)) 

egen st_mrank_8_nomis=rank(raw_math_8)  if campus_8!="", by(year_8 subject)
gen st_pmrank_8_nomis=((st_mrank_8_nomis-1)/(num_cohort_8_nomis_math-1)) 

egen cl_rrank_8_nomis=rank(raw_read_8)  if campus_8!="", by(campus_8 year_8 subject)
gen cl_prrank_8_nomis=((cl_rrank_8_nomis-1)/(num_class_8_nomis_read-1))

egen cl_mrank_8_nomis=rank(raw_math_8) if campus_8!="", by(campus_8 year_8 subject) 
gen cl_pmrank_8_nomis=((cl_mrank_8_nomis-1)/(num_class_8_nomis_math-1))

gen st_rank_8_nomis=st_mrank_8_nomis if subject==1 & campus_8!=""
replace st_rank_8_nomis=st_rrank_8_nomis if subject==2 & st_rank_8_nomis==. & campus_8!=""

gen st_prank_8_nomis=st_pmrank_8_nomis if subject==1 & campus_8!=""
replace st_prank_8_nomis=st_prrank_8_nomis if subject==2 & st_prank_8_nomis==. & campus_8!=""

gen cl_rank_8_nomis=cl_mrank_8_nomis if subject==1 & campus_8!=""
replace cl_rank_8_nomis=cl_rrank_8_nomis if subject==2 & cl_rank_8_nomis==. & campus_8!=""

gen cl_prank_8_nomis=cl_pmrank_8_nomis if subject==1 & campus_8!=""
replace cl_prank_8_nomis=cl_prrank_8_nomis if subject==2 & cl_prank_8_nomis==.& campus_8!=""


label var st_prank_8_nomis "Percentile Rank in State Yr8, Non-missing Students"
label var cl_prank_8_nomis "Percentile Rank in State Yr8, Non-missing Students"

label var st_mrank_8_nomis "Math Rank in State Yr8, Non-missing Students"
label var st_rrank_8_nomis "Reading Rank in State Yr8, Non-missing Students"
label var st_pmrank_8_nomis "Math Percentile Rank in State Yr8, Non-missing Students"
label var st_prrank_8_nomis "Reading Percentile Rank in State Yr8, Non-missing Students"
label var st_rank_8_nomis "Rank in State Yr8, Non-missing Students"
label var st_prank_8_nomis "Percentile in State Yr8, Non-missing Students"

label var cl_mrank_8_nomis "Math Rank in Class Yr8, Non-missing Students"
label var cl_rrank_8_nomis "Reading Rank in Class Yr8, Non-missing Students"
label var cl_pmrank_8_nomis "Math Percentile Rank in Class Yr8, Non-missing Students"
label var cl_prrank_8_nomis "Reading Percentile Rank in Class Yr8, Non-missing Students"
label var cl_rank_8_nomis "Rank in Class Yr8, Non-missing Students"
label var cl_prank_8_nomis "Percentile in in Class Yr8, Non-missing Students"


 
sort id subject

sort id subject
gen ordinal_mrank_8=ordinal_rank_8 if subject==1
by id:replace ordinal_mrank_8=ordinal_mrank_8[_n-1] if ordinal_mrank_8==. & ordinal_mrank_8[_n-1]!=.

gen ordinal_rrank_8=ordinal_rank_8 if subject==2
by id:replace ordinal_rrank_8=ordinal_rrank_8[_n+1] if ordinal_rrank_8==. & ordinal_rrank_8[_n+1]!=.

label var state_perc_8 "Grade 8 State Percentile Rank Amoung all students"
label var ordinal_rank_8 "Grade 8 Class Percentile Rank Amoung all students"

label var ordinal_rrank_8 "Grade 8 Class Reading Percentile Rank Amoung all students"
label var ordinal_mrank_8 "Grade 8 Class Math Percentile Rank Amoung all students"

********************************************************************************
********************************************************************************


des *cip*

gen major_code=""
	foreach x in 0 1 2 3{
		foreach y in fall sprg{
			tostring(stucip_4yr_`y'_`x'), gen(major_`y'`x')
			replace major_`y'`x'="0"+major_`y'`x' if strlen(major_`y'`x')==7
			replace major_code=substr(major_`y'`x',1,2) if major_`y'`x'!="." & major_`y'`x'!="99999999"
		}
	}
		
	gen STEM_major=1 if major_code=="14" | major_code=="26" | major_code=="27" | major_code=="40"
	replace STEM_major=0 if major_code!="14" & major_code!="26" & major_code!="27" & major_code!="40"
	replace STEM_major=. if major_code=="."

	drop major_fall* major_sprg*
	
gen uni_code=.
foreach x in 0 1 2 3{
	foreach y in fall sprg{
		foreach z in 2 4{
			replace uni_code= stufice_`z'yr_`y'_`x' if uni_code==.
		}
	}
}
	
gen college_type=.
foreach x in 0 1 2 3{
	foreach y in fall sprg{
		replace college_type=1 if enr2yr_`y'_`x'==1 & college_type==.
		replace college_type=2 if enr4yr_`y'_`x'==1 & college_type==.
	}
}
label var college_type "2 or 4 year college -matches uni code"
label define college_type 1 "2Year" 2 "4year"
label values college_type college_type

egen tot_fsme= sum(fsme_dum), by(campus_3 year_3 subject)
gen pro_fsme_all=tot_fsme/num_class_3_all
label var pro_fsme_all "Proportion of Class Yr 3 FSME, All Sudents"
gen pro_oth_fsme_all=(tot_fsme-fsme_dum)/(num_class_3_all-1)
label var pro_oth_fsme_all "Proportion of others in Class Yr 3 FSME, All Sudents"
	
egen tot_ethnic= sum(ethnic_dum), by(sub_year_school)
gen pro_ethnic_all=tot_ethnic/num_class_3_all
label var pro_ethnic_all "Proportion of Class Yr 3 Ethnic, All Sudents"
gen pro_oth_ethnic_all=(tot_ethnic-ethnic_dum)/(num_class_3_all-1)
label var pro_ethnic_all "Proportion of others in Class Yr 3 Ethnic, All Sudents"

egen tot_esl= sum(esl_dum), by(sub_year_school)
gen pro_esl_all=tot_esl/num_class_3_all
label var pro_esl_all "Proportion of Class Yr 3 ESL, All Sudents"
gen pro_oth_esl_all=(tot_esl-esl_dum)/(num_class_3_all-1)
label var pro_oth_esl_all "Proportion of others in Class Yr 3 ESL, All Sudents"
 

sort y3_sch_coh_sub
by  y3_sch_coh_sub: egen class_sd=sd(st_prank_3_all)
label var pro_oth_esl_all "SD in Yr3 Class Test Scores, All Sudents"


replace ap_calcab=1 if ap_calcab>1 & ap_calcab!=.
replace ap_sci=1 if ap_sci>1 & ap_sci!=.
replace ap_eng=1 if ap_eng>1 & ap_eng!=.

gen any_college=enrcollege
replace any_college=1 if any_college==2

gen both_college=1 if enrcollege==2
replace both_college=0 if enrcollege==0|enrcollege==1


xtile vent_cl_prank_all=cl_prank_3_all, n(20)
xtile vent_st_prank_all=st_prank_3_all, n(20)
xtile st_prank_int_all=st_prank_3_all, n(100)

xtile vent_cl_prank_bottom=cl_prank_3_bottom, n(20)
xtile vent_st_prank_bottom=st_prank_3_bottom, n(20)

xtile vent_cl_prank_random=cl_prank_3_random, n(20)
xtile vent_st_prank_random=st_prank_3_random, n(20)


ta vent_cl_prank_all, gen(vent_cl_prank_all_)
ta vent_st_prank_all, gen(vent_st_prank_all_)

xtile vent_cl_prank_ontime=cl_prank_3_ontime, n(20)
xtile vent_st_prank_ontime=st_prank_3_ontime, n(20)
xtile st_prank_int_ontime=st_prank_3_ontime, n(100)

ta vent_cl_prank_ontime, gen(vent_cl_prank_)
ta vent_st_prank_ontime, gen(vent_st_prank_)



xtile vent_cl_prank_8_all=ordinal_rank_8, n(20)
xtile 	=state_perc_8, n(20)

ta vent_cl_prank_8_all, gen(vent_cl_prank_8_all_)
ta vent_st_prank_8_all, gen(vent_st_prank_8_all_)

 

xtile vent_cl_prank_8_ontime=cl_prank_8_nomis, n(20)
xtile vent_st_prank_8_ontime=st_prank_8_nomis, n(20)

ta vent_cl_prank_8_ontime, gen(vent_cl_prank_8_nomiss)
ta vent_st_prank_8_ontime, gen(vent_st_prank_8_nomiss)


*Getting consistent sample
drop if male==.
drop if fsme_dum==.
drop if esl_dum==.
drop if ethnic_dum==.


drop if st_prank_3_all==.
drop if raw_read_3==.
drop if raw_math_3==.

drop if take3rdtestontime==0

drop if num_class_ontime<10

*Drop duplicates
sort id2 subject
by id2:gen count=_N
drop if count!=2  
drop count
by id2:egen sum=sum(subject)
drop if sum!=3
drop sum

save $ui_dir/pre_analysis,replace